clear all
set more off
set mem 10000000
set matsize 10000
version 13

****************************************************************** 
*** Build File to Process Raw Census 2011 HPCA *******************
****************************************************************** 

** Set file paths
do "$path_code/paths.do"

********************************************************************************
********************************************************************************

** 2011 HPCA

* Insheet and append all the downloaded csv files
{
clear
cd "$hpca11/python scraped csvs"
local myfilelist : dir "." files "*.csv"
foreach filename of local myfilelist{
  preserve
  insheet using `filename', comma double clear
  drop in 1
  drop v1
  duplicates drop
  gen v1 = _n
  tostring v1, replace
  replace v1 = string(v2) + "-" + string(v4) + "-" + v1
  order v1 
  save "$hpca11/temp.dta", replace
  restore 
  append using "$hpca11/temp.dta"
}
duplicates drop
}

* Rename variables
{
{
rename v1 v_id_hpca11
rename v2 st_code11
rename v3 state
rename v4 dt_code11
rename v5 district
rename v6 bk_code11
rename v7 block
rename v8 vi_code11
rename v9 ward_no
rename v10 area
rename v11 ru
drop v12
rename v13 t_hh_g
rename v14 t_hh_l
rename v15 t_hh_d
rename v16 t_hh_r
rename v17 t_hh_r_g
rename v18 t_hh_r_l
rename v19 t_hh_r_d
rename v20 t_hh_r_o
rename v21 t_hh_rc_g
rename v22 t_hh_rc_l
rename v23 t_hh_rc_d
rename v24 mat_r_gtbw
rename v25 mat_r_pp
rename v26 mat_r_hmt
rename v27 mat_r_mmt
rename v28 mat_r_bb
rename v29 mat_r_ss
rename v30 mat_r_gmas
rename v31 mat_r_c
rename v32 mat_r_aom
rename v33 mat_w_gtb
rename v34 mat_w_pp
rename v35 mat_w_mub
rename v36 mat_w_w
rename v37 mat_w_snpwm
rename v38 mat_w_spwm
rename v39 mat_w_gimas
rename v40 mat_w_bb
rename v41 mat_w_c
rename v42 mat_w_aom
rename v43 mat_f_m
rename v44 mat_f_wb
rename v45 mat_f_bb
rename v46 mat_f_st
rename v47 mat_f_c
rename v48 mat_f_mf
rename v49 mat_f_aom
rename v50 room_0
rename v51 room_1
rename v52 room_2
rename v53 room_3
rename v54 room_4
rename v55 room_5
rename v56 room_6plus
rename v57 h_size_1
rename v58 h_size_2
rename v59 h_size_3
rename v60 h_size_4
rename v61 h_size_5
rename v62 h_size_6_8
rename v63 h_size_9plus
rename v64 owned
rename v65 rented
rename v66 not_owned_rented
rename v67 coup_0
rename v68 coup_1
rename v69 coup_2
rename v70 coup_3
rename v71 coup_4
rename v72 coup_5
rename v73 dw_tfts
rename v74 dw_tfus
rename v75 dw_cw
rename v76 dw_uw
rename v77 dw_hp
rename v78 dw_tb
rename v79 dw_sp
rename v80 dw_rc
rename v81 dw_tpl
rename v82 dw_os
rename v83 w_within
rename v84 w_near
rename v85 w_away
rename v86 msl_elec
rename v87 msl_ker
rename v88 msl_sol
rename v89 msl_oo
rename v90 msl_ao
rename v91 msl_nl
rename v92 lat_premise
rename v93 lat_pss
rename v94 lat_st
rename v95 lat_os
rename v96 lat_pit_svi
rename v97 lat_pit_sop
rename v98 lat_drain
rename v99 lat_nrh
rename v100 lat_nsa
rename v101 no_lat
rename v102 no_lat_alt
rename v103 no_lat_no_alt
rename v104 bath
rename v105 bath_ewr
rename v106 bath_no
rename v107 ww_cd
rename v108 ww_od
rename v109 ww_nd
rename v110 cook_fw
rename v111 cook_cr
rename v112 cook_cc
rename v113 cook_clc
rename v114 cook_ker
rename v115 cook_lpg_png
rename v116 cook_elec
rename v117 cook_biog
rename v118 cook_ao
rename v119 cook_no
drop v120 
rename v121 cook_ih
rename v122 cook_ih_kitchen
rename v123 cook_ih_no_kitchen
rename v124 cook_oh
rename v125 cook_oh_kitchen
rename v126 cook_oh_no_kitchen
rename v127 cook_no2
rename v128 bank
rename v129 assets_rt
rename v130 assets_tv
rename v131 assets_cl_int
rename v132 assets_cl_no_int
rename v133 assets_tel_l
rename v134 assets_tel_m
rename v135 assets_tel_b
rename v136 assets_bic
rename v137 assets_smm
rename v138 assets_cjv
drop v139
rename v140 assets_none
rename v141 hh_perm
rename v142 hh_semi_perm
rename v143 hh_temp_total
rename v144 hh_temp_serv
rename v145 hh_temp_nonserv
rename v146 hh_unclassif
}
foreach v of varlist t_hh_g-hh_unclassif {
  local newname = "`v'"
	local newname = "hpca11_" + "`newname'"
	recast double `v'
	replace `v' = `v'/100  // to rescale all vars between 0 and 1
	rename `v' `newname'
}
}

*Label variables
{
la var v_id_hpca11 "2011 HPCA unique identifier"
la var st_code11 "State Code (2011)"
la var state "State Name"
la var dt_code11 "District Code (2011)"
la var district "District Name"
la var bk_code11 "Block Code (2011)"
la var block "Block Name"
la var vi_code11 "Village code (2011)"
la var ward_no "Ward No"
la var area "Area Name"
la var ru "Rural/Urban"
la var hpca11_t_hh_g "% HH that are 'good'"
la var hpca11_t_hh_l "% HH that are 'livibale'"
la var hpca11_t_hh_d "% HH that are 'delapidated'"
la var hpca11_t_hh_r "% HH that are 'Residences'"
la var hpca11_t_hh_r_g "% residence households that are 'good'"
la var hpca11_t_hh_r_l "% residence households that are 'livibale'"
la var hpca11_t_hh_r_d "% residence households that are 'delapidated'"
la var hpca11_t_hh_r_o "% HH that are 'Residence-cum-other'"
la var hpca11_t_hh_rc_g "% residence-cum-other households that are 'good'"
la var hpca11_t_hh_rc_l "% residence-cum-other households that are 'livibale'"
la var hpca11_t_hh_rc_d "% residence-cum-other households that are 'delapidated'"
la var hpca11_mat_r_gtbw "% HH with roof made of grass/thatch/bamboo/wood/mud"
la var hpca11_mat_r_pp "% HH with roof made of plastic/polythene"
la var hpca11_mat_r_hmt "% HH with roof made of hand-made tile"
la var hpca11_mat_r_mmt "% HH with roof made of machine-made tile"
la var hpca11_mat_r_bb "% HH with roof made of burnt brick"
la var hpca11_mat_r_ss "% HH with roof made of stone/slate"
la var hpca11_mat_r_gmas "% HH with roof made of GI/metal/asbestos sheets"
la var hpca11_mat_r_c "% HH with roof made of concrete"
la var hpca11_mat_r_aom "% HH with roof made of any other material"
la var hpca11_mat_w_gtb "% HH with walls made of grass/thatch/bamboo"
la var hpca11_mat_w_pp "% HH with walls made of plastic/polythene"
la var hpca11_mat_w_mub "% HH with walls made of mud/unburnt brick"
la var hpca11_mat_w_w "% HH with walls made of wood"
la var hpca11_mat_w_snpwm "% HH with walls made of stone not packed with mortar"
la var hpca11_mat_w_spwm "% HH with walls made of stone packed with mortar"
la var hpca11_mat_w_gimas "% HH with walls made of GI/metal/asbestos sheets"
la var hpca11_mat_w_bb "% HH with walls made of burnt brick"
la var hpca11_mat_w_c "% HH with walls made of concrete"
la var hpca11_mat_w_aom "% HH with walls made of any other material"
la var hpca11_mat_f_m "% HH with floor made of mud"
la var hpca11_mat_f_wb "% HH with floor made of wood/bamboo"
la var hpca11_mat_f_bb "% HH with floor made of burnt brick"
la var hpca11_mat_f_st "% HH with floor made of stone"
la var hpca11_mat_f_c "% HH with floor made of cement"
la var hpca11_mat_f_mf "% HH with floor made of mosaic floor tiles"
la var hpca11_mat_f_aom "% HH with floor made of any other material"
la var hpca11_room_0 "% HH with no exclusive dwelling room"
la var hpca11_room_1 "% HH with 1 room"
la var hpca11_room_2 "% HH with 2 rooms"
la var hpca11_room_3 "% HH with 3 rooms"
la var hpca11_room_4 "% HH with 4 rooms"
la var hpca11_room_5 "% HH with 5 rooms"
la var hpca11_room_6plus "% HH with 6+ rooms"
la var hpca11_h_size_1 "% HH with 1 person"
la var hpca11_h_size_2 "% HH with 2 people"
la var hpca11_h_size_3 "% HH with 3 people"
la var hpca11_h_size_4 "% HH with 4 people"
la var hpca11_h_size_5 "% HH with 5 people"
la var hpca11_h_size_6_8 "% HH with 6-8 people"
la var hpca11_h_size_9plus "% HH with 9+ people"
la var hpca11_owned "% HH that are owned"
la var hpca11_rented "% HH that are rented"
la var hpca11_not_owned_rented "% HH with other ownership status"
la var hpca11_coup_0 "% HH with 0 married couples"
la var hpca11_coup_1 "% HH with 1 married couple"
la var hpca11_coup_2 "% HH with 2 married couples"
la var hpca11_coup_3 "% HH with 3 married couples"
la var hpca11_coup_4 "% HH with 4 married couples"
la var hpca11_coup_5 "% HH with 5+ married couples"
la var hpca11_dw_tfts "% HH with tap water from treated source"
la var hpca11_dw_tfus "% HH with tap water from untreated source"
la var hpca11_dw_cw "% HH with water from covered well"
la var hpca11_dw_uw "% HH with water from uncovered well"
la var hpca11_dw_hp "% HH with water from handpump"
la var hpca11_dw_tb "% HH with water from tubewell"
la var hpca11_dw_sp "% HH with water from spring"
la var hpca11_dw_rc "% HH with water from river/canal"
la var hpca11_dw_tpl "% HH with water from tank/pond/lake"
la var hpca11_dw_os "% HH with water from other sources"
la var hpca11_w_within "% HH with water within premises"
la var hpca11_w_near "% HH with water near premises"
la var hpca11_w_away "% HH with water far from premises"
la var hpca11_msl_elec "% HH with electricity as main lighting source"
la var hpca11_msl_ker "% HH with kerosene as main lighting source"
la var hpca11_msl_sol "% HH with solar energy as main lighting source"
la var hpca11_msl_oo "% HH with other oil as main lighting source"
la var hpca11_msl_ao "% HH with any other main lighting source"
la var hpca11_msl_nl "% HH with no lighting"
la var hpca11_lat_premise "% HH with latrine available with premises"
la var hpca11_lat_pss "% HH with piped sewer system"
la var hpca11_lat_st "% HH with septic tank"
la var hpca11_lat_os "% HH with other system for flush/pour latrine"
la var hpca11_lat_pit_svi "% HH with slab/ventilated pit latrine"
la var hpca11_lat_pit_sop "% HH with open pit latrine"
la var hpca11_lat_drain "% HH with night soil disposed in drain"
la var hpca11_lat_nrh "% HH with night soil removed by human"
la var hpca11_lat_nsa "% HH with night soil serviced by animal"
la var hpca11_no_lat "% HH without latrine on premises"
la var hpca11_no_lat_alt "% HH with no latrine and alternate public latrine"
la var hpca11_no_lat_no_alt "% HH with no latrine and no alternative latrine"
la var hpca11_bath "% HH with bathing room available"
la var hpca11_bath_ewr "% HH with bathing in enclosure without roof "
la var hpca11_bath_no "% HH with no bathing room"
la var hpca11_ww_cd "% HH with closed drainage for waste water"
la var hpca11_ww_od "% HH with open drainage for waste water"
la var hpca11_ww_nd "% HH with no drainage for waste water"
la var hpca11_cook_fw "% HH cooking with firewood"
la var hpca11_cook_cr "% HH cooking with crop residue"
la var hpca11_cook_cc "% HH cooking with cowdung cake"
la var hpca11_cook_clc "% HH cooking with coal/lignite/charcoal"
la var hpca11_cook_ker "% HH cooking with kerosene"
la var hpca11_cook_lpg_png "% HH cooking with LPG/PNG"
la var hpca11_cook_elec "% HH cooking with electricity"
la var hpca11_cook_biog "% HH cooking with biogas"
la var hpca11_cook_ao "% HH cooking with any other fuel source"
la var hpca11_cook_no "% HH with no cooking"
la var hpca11_cook_ih "% HH with cooking in home"
la var hpca11_cook_ih_kitchen "% HH with kitchen in home"
la var hpca11_cook_ih_no_kitchen "% HH with in-home cooking but no kitchen"
la var hpca11_cook_oh "% HH with cooking outside of home"
la var hpca11_cook_oh_kitchen "% HH with kitchen outside of home"
la var hpca11_cook_oh_no_kitchen "% HH with cooking outside of home and no kitchen"
la var hpca11_cook_no2 "% HH with no cooking"
la var hpca11_bank "% HH availing banking services"
la var hpca11_assets_rt "% HH with radio/transister"
la var hpca11_assets_tv "% HH with television"
la var hpca11_assets_cl_int "% HH with computer/laptop with internet"
la var hpca11_assets_cl_no_int "% HH with computer/laptop without internet"
la var hpca11_assets_tel_l "% HH with landline phone only "
la var hpca11_assets_tel_m "% HH with mobile phone only"
la var hpca11_assets_tel_b "% HH with both landline and mobile phones"
la var hpca11_assets_bic "% HH with bicycle"
la var hpca11_assets_smm "% HH with scooter/motorcycle/moped"
la var hpca11_assets_cjv "% HH with car/jeep/van"
la var hpca11_assets_none "% HH with none of the specified assets"
la var hpca11_hh_perm "% HH that are permanent"
la var hpca11_hh_semi_perm "% HH that are semi-permanent"
la var hpca11_hh_temp_total "% HH that are temporary"
la var hpca11_hh_temp_serv "% HH that are temporary and serviceable"
la var hpca11_hh_temp_nonserv "% HH that are temporary and nonserviceable"
la var hpca11_hh_unclassif "% HH that have unclassifiable structures"
}


*Clean and save as 2011 HPCA 
replace v_id_hpca11 = string(_n)
destring v_id_hpca11, replace
replace state = upper(trim(itrim(subinstr(state,"amp;","",.))))
replace district = upper(trim(itrim(subinstr(district,"amp;","",.))))
replace block = upper(trim(itrim(subinstr(block,"amp;","",.))))
replace area = upper(trim(itrim(subinstr(area,"amp;","",.))))
drop if vi_code11==0 & regexm(area,"Sub-Dist")==1
drop if vi_code11==0 & regexm(area,"District -")==1
drop if vi_code11==0 & regexm(area,"State -")==1
rename area village
la var village "Village name"
drop if ru=="Urban"
drop if vi_code11==0
drop ru ward
drop if inlist(st_code11,4,7,25,26,30,31,34,35)  // drop non-RGGVY states
duplicates drop
compress
save "$hpca11/hpca11.dta", replace

